/* * FindBugs - Find Bugs in Java programs * Copyright (C) 2006, University of Maryland * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307, USA */ package edu.umd.cs.findbugs.util; import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.apache.commons.lang.StringEscapeUtils; /** * A class for static String utility methods. * * @author Brian Cole */ public class Strings { /** * This is intended to be semantically equivalent to * <code>source.replace(find, repl)</code> but also compatible with JDK 1.4. * * @param source * The String on which to operate * @param find * The literal substring to be replaced * @param repl * The literal replacement substring * @return The resultant String after substitution * @throws NullPointerException * if any of the arguments are null * @throws IllegalArgumentException * if <code>find</code> has zero length * @see java.lang.String#replace(CharSequence target, CharSequence * replacement) */ @Deprecated public static String replace(String source, String find, String repl) { return source.replace(find, repl); } /** * This is intended to be equivalent to <code>Arrays.toString(a)</code> but * also compatible with JDK 1.4. This concatenates the results of calling * String.valueOf() on each element of the array, so this won't work well * for multi-dimensional arrays. * * @see java.lang.String#valueOf(Object) * @see java.util.Arrays#toString(Object[]) * @see java.util.Arrays#deepToString(Object[]) */ @Deprecated public static String toString(final Object[] a) { return Arrays.toString(a); } /** * Trim trailing comma from given string. * * @param s * a string * @return the same string with trailing comma trimmed (if any) */ public static String trimComma(String s) { if (s.endsWith(",")) { s = s.substring(0, s.length() - 1); } return s; } /* * XML escape/unescape routines: extend functionality of * org.apache.commons.lang.{un,}escapeXml() by supporting low-value * character escaping/unescaping */ private static final int xmlAllowedLowCharacterBound = 0x20; private static boolean isInvalidXMLCharacter(int c) { if (c < xmlAllowedLowCharacterBound && c >= 0x0 && // low-value characters allowed by XML 1.0 spec c != 0x9 && c != 0xA && c != 0xD) return true; return false; } private static volatile boolean xmlLowValueEscapeStringsInitialized = false; private static final String[] xmlLowValueEscapeStrings = new String[xmlAllowedLowCharacterBound]; private static final Object escapeInitLockObject = new Object(); /** * Initializes the map of characters to be escaped and their corresponding * escape sequences. This method will be invoked automatically the first * time a string is escaped/unescaped. * * @see <a href="http://www.w3.org/TR/REC-xml/#charsets>Extensible Markup * Language (XML) 1.0 (Fifth Edition)</a> */ public static void initializeEscapeMap() { if (xmlLowValueEscapeStringsInitialized == true) return; /* * synchronize the lazy initialization so things don't break if FindBugs * ever becomes multi-threaded (and also so FindBugs doesn't throw a * warning about the thread safety of this operation) */ synchronized (escapeInitLockObject) { if (xmlLowValueEscapeStringsInitialized == true) return; for (int i = 0; i < xmlAllowedLowCharacterBound; i++) { if (isInvalidXMLCharacter(i)) { String escapedString = String.format("\\u%04x", i); xmlLowValueEscapeStrings[i] = escapedString; } else { xmlLowValueEscapeStrings[i] = null; } } xmlLowValueEscapeStringsInitialized = true; } } /** * Escape XML entities and illegal characters in the given string. This * enhances the functionality of * org.apache.commons.lang.StringEscapeUtils.escapeXml by escaping * low-valued unprintable characters, which are not permitted by the W3C XML * 1.0 specification. * * @param s * a string * @return the same string with characters not permitted by the XML * specification escaped * @see <a href="http://www.w3.org/TR/REC-xml/#charsets>Extensible Markup * Language (XML) 1.0 (Fifth Edition)</a> * @see <a * href="http://commons.apache.org/lang/api/org/apache/commons/lang/StringEscapeUtils.html#escapeXml(java.lang.String)">org.apache.commons.lang.StringEscapeUtils * javadoc</a> */ public static String escapeXml(String s) { initializeEscapeMap(); if (s == null || s.length() == 0) return s; char[] sChars = s.toCharArray(); StringBuilder sb = new StringBuilder(); int lastReplacement = 0; for (int i = 0; i < sChars.length; i++) { if (isInvalidXMLCharacter(sChars[i])) { // append intermediate string to string builder sb.append(sChars, lastReplacement, i - lastReplacement); // substitute control character with escape sequence sb.append(xmlLowValueEscapeStrings[sChars[i]]); // advance last pointer past this character lastReplacement = i + 1; } } if (lastReplacement < sChars.length) sb.append(sChars, lastReplacement, sChars.length - lastReplacement); return StringEscapeUtils.escapeXml(sb.toString()); } private static final String unicodeUnescapeMatchExpression = "(\\\\*)(\\\\u)(\\p{XDigit}{4})"; private static Pattern unescapePattern = null; private static volatile boolean paternIsInitialized = false; private static final Object unescapeInitLockObject = new Object(); /** * Initialize regular expressions used in unescaping. This method will be * invoked automatically the first time a string is unescaped. */ public static boolean initializeUnescapePattern() { if (paternIsInitialized == true) return true; synchronized (unescapeInitLockObject) { if (paternIsInitialized == true) return true; try { unescapePattern = Pattern.compile(unicodeUnescapeMatchExpression); } catch (PatternSyntaxException pse) { /* * the pattern is compiled from a final string, so this * exception should never be thrown */ System.err.println("Imposible error: " + "static final regular expression pattern " + "failed to compile. Exception: " + pse.toString()); return false; } paternIsInitialized = true; } return true; } /** * Unescape XML entities and illegal characters in the given string. This * enhances the functionality of * org.apache.commons.lang.StringEscapeUtils.unescapeXml by unescaping * low-valued unprintable characters, which are not permitted by the W3C XML * 1.0 specification. * * @param s * a string * @return the same string with XML entities/escape sequences unescaped * @see <a href="http://www.w3.org/TR/REC-xml/#charsets>Extensible Markup * Language (XML) 1.0 (Fifth Edition)</a> * @see <a * href="http://commons.apache.org/lang/api/org/apache/commons/lang/StringEscapeUtils.html#unescapeXml(java.lang.String)">org.apache.commons.lang.StringEscapeUtils * javadoc</a> */ public static String unescapeXml(String s) { initializeEscapeMap(); /* * we can't escape the string if the pattern doesn't compile! (but that * should never happen since the pattern is static) */ if (!initializeUnescapePattern()) return s; if (s == null || s.length() == 0) return s; /* * skip this expensive check entirely if there are no substrings * resembling Unicode escape sequences in the string to be unescaped */ if (s.contains("\\u")) { StringBuffer sUnescaped = new StringBuffer(); Matcher m = unescapePattern.matcher(s); while (m.find() == true) { String slashes = m.group(1); String digits = m.group(3); int escapeCode; try { escapeCode = Integer.parseInt(digits, 16); } catch (NumberFormatException nfe) { /* * the static regular expression string should guarantee * that this exception is never thrown */ System.err.println("Impossible error: escape sequence '" + digits + "' is not a valid hex number! " + "Exception: " + nfe.toString()); return s; } if (slashes != null && slashes.length() % 2 == 0 && isInvalidXMLCharacter(escapeCode)) { Character escapedSequence = Character.valueOf((char) escapeCode); /* * slashes are apparently escaped when the string buffer is * converted to a string, so double them to make sure the * correct number appear in the final representation */ m.appendReplacement(sUnescaped, slashes + slashes + escapedSequence.toString()); } } m.appendTail(sUnescaped); s = sUnescaped.toString(); } return StringEscapeUtils.unescapeXml(s); } public static String escapeLFCRBackSlash(String s) { s = s.replace("\\", "\\\\"); s = s.replace("\r", "\\r"); s = s.replace("\n", "\\n"); return s; } }